This notebook demonstrates some of the basic functionality of librosa version 0.4.
Following through this example, you'll learn how to:
In [ ]:
from __future__ import print_function
In [ ]:
# We'll need numpy for some mathematical operations
import numpy as np
# matplotlib for displaying the output
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
# and IPython.display for audio output
import IPython.display
# Librosa for audio
import librosa
# And the display module for visualization
import librosa.display
In [ ]:
audio_path = librosa.util.example_audio_file()
# or uncomment the line below and point it at your favorite song:
#
# audio_path = '/path/to/your/favorite/song.mp3'
y, sr = librosa.load(audio_path)
By default, librosa will resample the signal to 22050Hz.
You can change this behavior by saying:
librosa.load(audio_path, sr=44100)
to resample at 44.1KHz, or
librosa.load(audio_path, sr=None)
to disable resampling.
This first step will show how to compute a Mel spectrogram from an audio waveform.
In [ ]:
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
# Make a new figure
plt.figure(figsize=(12,4))
# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
# Put a descriptive title on the plot
plt.title('mel power spectrogram')
# draw a color bar
plt.colorbar(format='%+02.0f dB')
# Make the figure layout compact
plt.tight_layout()
In [ ]:
y_harmonic, y_percussive = librosa.effects.hpss(y)
In [ ]:
# What do the spectrograms look like?
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S_harmonic = librosa.feature.melspectrogram(y_harmonic, sr=sr)
S_percussive = librosa.feature.melspectrogram(y_percussive, sr=sr)
# Convert to log scale (dB). We'll use the peak power as reference.
log_Sh = librosa.logamplitude(S_harmonic, ref_power=np.max)
log_Sp = librosa.logamplitude(S_percussive, ref_power=np.max)
# Make a new figure
plt.figure(figsize=(12,6))
plt.subplot(2,1,1)
# Display the spectrogram on a mel scale
librosa.display.specshow(log_Sh, sr=sr, y_axis='mel')
# Put a descriptive title on the plot
plt.title('mel power spectrogram (Harmonic)')
# draw a color bar
plt.colorbar(format='%+02.0f dB')
plt.subplot(2,1,2)
librosa.display.specshow(log_Sp, sr=sr, x_axis='time', y_axis='mel')
# Put a descriptive title on the plot
plt.title('mel power spectrogram (Percussive)')
# draw a color bar
plt.colorbar(format='%+02.0f dB')
# Make the figure layout compact
plt.tight_layout()
Next, we'll extract Chroma features to represent pitch class information.
In [ ]:
# We'll use a CQT-based chromagram here. An STFT-based implementation also exists in chroma_cqt()
# We'll use the harmonic component to avoid pollution from transients
C = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
# Make a new figure
plt.figure(figsize=(12,4))
# Display the chromagram: the energy in each chromatic pitch class as a function of time
# To make sure that the colors span the full range of chroma values, set vmin and vmax
librosa.display.specshow(C, sr=sr, x_axis='time', y_axis='chroma', vmin=0, vmax=1)
plt.title('Chromagram')
plt.colorbar()
plt.tight_layout()
Mel-frequency cepstral coefficients are commonly used to represent texture or timbre of sound.
In [ ]:
# Next, we'll extract the top 13 Mel-frequency cepstral coefficients (MFCCs)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
# Let's pad on the first and second deltas while we're at it
delta_mfcc = librosa.feature.delta(mfcc)
delta2_mfcc = librosa.feature.delta(mfcc, order=2)
# How do they look? We'll show each in its own subplot
plt.figure(figsize=(12, 6))
plt.subplot(3,1,1)
librosa.display.specshow(mfcc)
plt.ylabel('MFCC')
plt.colorbar()
plt.subplot(3,1,2)
librosa.display.specshow(delta_mfcc)
plt.ylabel('MFCC-$\Delta$')
plt.colorbar()
plt.subplot(3,1,3)
librosa.display.specshow(delta2_mfcc, sr=sr, x_axis='time')
plt.ylabel('MFCC-$\Delta^2$')
plt.colorbar()
plt.tight_layout()
# For future use, we'll stack these together into one matrix
M = np.vstack([mfcc, delta_mfcc, delta2_mfcc])
In [ ]:
# Now, let's run the beat tracker.
# We'll use the percussive component for this part
plt.figure(figsize=(12, 6))
tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr)
# Let's re-draw the spectrogram, but this time, overlay the detected beats
plt.figure(figsize=(12,4))
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
# Let's draw transparent lines over the beat frames
plt.vlines(librosa.frames_to_time(beats),
1, 0.5 * sr,
colors='w', linestyles='-', linewidth=2, alpha=0.5)
plt.axis('tight')
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
By default, the beat tracker will trim away any leading or trailing beats that don't appear strong enough.
To disable this behavior, call beat_track()
with trim=False
.
In [ ]:
print('Estimated tempo: %.2f BPM' % tempo)
print('First 5 beat frames: ', beats[:5])
# Frame numbers are great and all, but when do those beats occur?
print('First 5 beat times: ', librosa.frames_to_time(beats[:5], sr=sr))
# We could also get frame numbers from times by librosa.time_to_frames()
In [ ]:
# feature.sync will summarize each beat event by the mean feature vector within that beat
M_sync = librosa.util.sync(M, beats)
plt.figure(figsize=(12,6))
# Let's plot the original and beat-synchronous features against each other
plt.subplot(2,1,1)
librosa.display.specshow(M)
plt.title('MFCC-$\Delta$-$\Delta^2$')
# We can also use pyplot *ticks directly
# Let's mark off the raw MFCC and the delta features
plt.yticks(np.arange(0, M.shape[0], 13), ['MFCC', '$\Delta$', '$\Delta^2$'])
plt.colorbar()
plt.subplot(2,1,2)
# librosa can generate axis ticks from arbitrary timestamps and beat events also
librosa.display.specshow(M_sync, x_axis='time',
x_coords=librosa.frames_to_time(librosa.util.fix_frames(beats)))
plt.yticks(np.arange(0, M_sync.shape[0], 13), ['MFCC', '$\Delta$', '$\Delta^2$'])
plt.title('Beat-synchronous MFCC-$\Delta$-$\Delta^2$')
plt.colorbar()
plt.tight_layout()
In [ ]:
# Beat synchronization is flexible.
# Instead of computing the mean delta-MFCC within each beat, let's do beat-synchronous chroma
# We can replace the mean with any statistical aggregation function, such as min, max, or median.
C_sync = librosa.util.sync(C, beats, aggregate=np.median)
plt.figure(figsize=(12,6))
plt.subplot(2, 1, 1)
librosa.display.specshow(C, sr=sr, y_axis='chroma', vmin=0.0, vmax=1.0, x_axis='time')
plt.title('Chroma')
plt.colorbar()
plt.subplot(2, 1, 2)
librosa.display.specshow(C_sync, y_axis='chroma', vmin=0.0, vmax=1.0, x_axis='time',
x_coords=librosa.frames_to_time(librosa.util.fix_frames(beats)))
plt.title('Beat-synchronous Chroma (median aggregation)')
plt.colorbar()
plt.tight_layout()